import requests
from lxml import etree
import con_sql
import pymysql


def main():
    url = "http://www.stats.gov.cn/tjsj/pcsj/rkpc/6rp/html/A0101a.htm"
    response = requests.get(url)
    html = response.content.decode("gbk")
    element = etree.HTML(html)

    finnally_data = []
    for index in range(8, 40):
        find_data = element.xpath(f'//*[@id=" "]/table//tr[{index}]//text()')
        data_demo = [item for item in find_data if item != "\r\n  " and item != " "]
        demo = [item for item in data_demo[:len(data_demo)-17] if item != f"\xa0\xa0\xa0 "]
        finn_data = data_demo[3:-1]
        finn_data.insert(0, "".join(demo))
        finnally_data.append(finn_data)

    return finnally_data


def save_to_sql(fina_data):
    print(fina_data)
    try:
        con = pymysql.connect(user="root", host="localhost", passwd="as", database="gjtjj")
    except ConnectionError:
        raise ConnectionError("数据库连接错误!")

    cursor = con.cursor()
    sql = """
        INSERT INTO population_info(province, total , household, collectively, pop_total, male,
    female, sex_ratio, household_sub_total, household_male, household_female, household_sex_ratio,
    collectively_sub_total, collectively_male, collectively_female, collectively_sex_ratio, average_household)
    VALUE (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);"""

    cursor.executemany(sql, fina_data)
    con.commit()
    cursor.close()
    con.close()


if __name__ == '__main__':
    con_sql.main()

    finnally_data = main()
    for item in finnally_data:
        print(type(item))
    print(finnally_data)
    save_to_sql(finnally_data)

